library(foreign)
library(tidyverse)
library(scales)


###############################################################
# Figure 2:  The Effects of Postal Voting on the Probability of 
#            Turnout in Referendums (Individual-level Data)
##############################################################

data <- read.dta("./marginal_effects/me_postalmarginal_all.dta")

label_group=data$group_description[data$indi_group_descr==1]
breaks_group=data$id[data$indi_group_descr==1]

label_group_members=data$group_member_descr[data$indi_group_member_descr==1]
breaks_group_members=data$id[data$indi_group_member_descr==1]

data$variable_num <- factor(data$variable_num,
                            labels = label_group)

data$title[data$group_member_descr=="Rightist parties"] <- "plain"
labeling_left <- data$title


ggplot(data, aes(y = id, x = beta, xmin = beta - 1.96*sqrt(beta_cov), xmax=beta + 1.96* sqrt(beta_cov))) +
  geom_vline(xintercept=0, linetype=="dash") +
  geom_point(size=3.8) + 
  geom_errorbarh(height=0.8)+
  facet_grid(variable_num ~ ., scales = "free", space = "free") +
  theme_bw(base_size = 32)  + 
  ylab("") + xlab("\n Effect of Postal Voting (Probability Change)") +
  theme(strip.text.y = element_text(angle=0)) +
  scale_x_continuous(limits = c(-.1, 0.2)) +
  scale_y_continuous(breaks=breaks_group_members,labels=label_group_members) +
  scale_shape_manual(values=c(1,16),name = "",labels = c("No Postal Voting","Postal Voting")) +
  theme(legend.direction = "horizontal", legend.position = "bottom") +
  theme(panel.grid.minor.y =  element_line(colour = "white", size = 0.5,linetype="dotted")) +
  theme(legend.key = element_blank(), strip.background = element_rect(colour="black", fill="grey90" ) ) + 
  theme(panel.margin = unit(0.8, "lines"))+ 
  theme(strip.background = element_blank(), strip.text = element_blank()) +
  theme(axis.text.y = element_text(face=c("bold","plain","plain","plain","plain","plain","plain")))


ggsave(file="Figure2.pdf",width=15,height=18 )



# Check whether these estimates are statistically significant from each other (wp p.13-15)

GetGroupBetas <- function(data,groupname){
  return(data[data$variable==groupname,])
}

Comparison <- function(data){
  datanew <- data[is.na(data$beta)==F,]
  dataout <- data.frame()
  for (i in 1:max(data$group)){
    for (j in i:max(as.numeric(data$group))){
      if (i!=j){
        tvalue <- (datanew$beta[datanew$group==i]-datanew$beta[datanew$group==j])/sqrt(datanew$beta_cov[datanew$group==i]+datanew$beta_cov[datanew$group==j])
        pvalue <- 2*pnorm(-abs(tvalue))
        dataloop <- data.frame(group1=i,group2=j,beta1=datanew$beta[datanew$group==i],beta2=datanew$beta[datanew$group==j],tvalue=tvalue,pvalue=pvalue)
        dataout <- rbind(dataout,dataloop)
      }
      else{}
    }
  }
  return(dataout)
}

Comparisons <- function(datain){
  dataout <- data.frame()
  for (i in levels(as.factor(datain$variable))){
    data_comparison <- GetGroupBetas(datain,i)
    dataloop <- Comparison(data_comparison)
    dataloop$variable <- i
    dataout <- rbind(dataout,dataloop)
  }
  return(dataout)
}

DataInput <- GetGroupBetas(data,"group_income")
Comparison(DataInput)
DataInput <- GetGroupBetas(data,"group_educ")
Comparison(DataInput)
DataInput <- GetGroupBetas(data,"group_interest")
Comparison(DataInput)

##############################################################
# Figure 3: The Composition of Turnout With and Without Postal 
#            Voting (Individual-level Data)
##############################################################
source("summarySE.R")

data1 <- read.dta("./shares/Nobs_all.dta") 
data2 <- read.dta("./marginal_effects/me_postal_all.dta")

data2 %>% select(variable_num,group,beta,postal) %>% 
  arrange(variable_num, group)


data <- merge(data1, data2, by=c("variable","group"))


# (ii) Compute turnout share of different subgroups

data$share_turnout<- data$share*data$beta


dfc <- summarySE(data, measurevar=c("share_turnout"),groupvars=c("variable","postal")) # take mean of share_turnout and multiply it by number of group members to get sum (below)

dfc$share_turnout_sum<-  dfc$share_turnout*dfc$N

data <- merge(data, dfc, by=c("variable","postal"))
data$share_out <- data$share_turnout.x/data$share_turnout_sum

data %>% select(variable_num.x,share_out,group,postal) %>% 
  arrange(variable_num.x, group)


# (iii) get share in overall population and rbind it to original file


data3 <- data[ data$postal==1,] 
data3$share_out <- data3$share 
data3$postal <- 0

data<- rbind(data,data3)



# (iv) define y scale

data$yscale <- matrix(NA,length(data$postal),1)
data$yscale[data$postal==0] <- (data$var_sort.x[data$postal==0] -1)*3+1
data$yscale[data$postal==1] <- (data$var_sort.x[data$postal==1] -1)*3+2
data$yscale[data$postal==2] <- (data$var_sort.x[data$postal==2] -1)*3+3


data$var_sort.x <- max(data$var_sort.x)-data$var_sort.x+1
data$yscale <- max(data$yscale)-data$yscale+1

cbind(data$variable,data$postal,data$var_sort.x,data$yscale)




# (v) Include group title

data$postal <- data$postal+1

df_new <- data[data$postal==1,]
df_new$postal<-0
df_new$share_out <-NA

yscale_breaks <- c(1:(max(df_new$yscale)+(max(df_new$yscale)/2)))

for (i in 1:max(data$variable_num.x)){
  df_new$group_description.x<-data$group_description.y[data$yscale==i*3][1]
}  

data<- rbind(data,df_new)


data <-   data[order(data$variable_num.x, data$postal),] 
data$yscale=data$var_sort.x*4-data$postal





# (vi) prepare labeling and positioning of group shares


for (i in 1:max(data$variable_num.x)){
  for (j in 1:length(data$share[data$variable_num.x==i& data$postal==1])){
    if(j==1){
      data$xpos[data$variable_num.x==i& data$postal==1& data$group==j]  <- sum(data$share_out[data$variable_num.x==i& is.element(data$group,c(1:(j-1))) & data$postal==1])/2
      data$xpos[data$variable_num.x==i& data$postal==2& data$group==j]  <- sum(data$share_out[data$variable_num.x==i& is.element(data$group,c(1:(j-1))) & data$postal==2])/2
      data$xpos[data$variable_num.x==i& data$postal==0& data$group==j]  <- sum(data$share_out[data$variable_num.x==i& is.element(data$group,c(1:(j-1))) & data$postal==0])/2
      data$xpos[data$variable_num.x==i& data$postal==3& data$group==j]  <- sum(data$share_out[data$variable_num.x==i& is.element(data$group,c(1:(j-1))) & data$postal==3])/2
      
    }
    else{
      data$xpos[data$variable_num.x==i& data$postal==1& data$group==j]  <- sum(data$share_out[data$variable_num.x==i& is.element(data$group,c(1:(j-1))) & data$postal==1])+data$share_out[data$variable_num.x==i& data$group==j & data$postal==1]/2
      data$xpos[data$variable_num.x==i& data$postal==2& data$group==j]  <- sum(data$share_out[data$variable_num.x==i& is.element(data$group,c(1:(j-1))) & data$postal==2])+data$share_out[data$variable_num.x==i& data$group==j & data$postal==2]/2  
      data$xpos[data$variable_num.x==i& data$postal==3& data$group==j]  <- sum(data$share_out[data$variable_num.x==i& is.element(data$group,c(1:(j-1))) & data$postal==3])+data$share_out[data$variable_num.x==i& data$group==j & data$postal==3]/2  
      
    }
  }
}  




# (vii) Add geom_text for group names and group shares

data$label[data$group==1] <- paste(data$group_member_descr.x[data$group==1]," (" ,round(data$share_out[data$group==1]*100,0),"%)",sep="")
data$label[data$group==2] <- paste(data$group_member_descr.x[data$group==2]," (" ,round(data$share_out[data$group==2]*100,0),"%)",sep="")
data$label[data$group==3] <- paste(data$group_member_descr.x[data$group==3]," (" ,round(data$share_out[data$group==3]*100,0),"%)",sep="")
data$label[data$group==4] <- paste(data$group_member_descr.x[data$group==4]," (" ,round(data$share_out[data$group==4]*100,0),"%)",sep="")
data$label[data$group==5] <- paste(data$group_member_descr.x[data$group==5]," (" ,round(data$share_out[data$group==5]*100,0),"%)",sep="")




# (vii) labeling of y scale

yscale_breaks <- rev(matrix(1:(length(data$var_sort.x[data$indi_group_descr.x==1 & data$postal==1])*4)))
yscale_labels <- matrix(NA,length(data$var_sort.x[data$indi_group_descr.x==1 & data$postal==1])*4,1)
yscale_labels[seq(1,45,4)] <- data$group_description.y[data$indi_group_descr.x==1 & data$postal==1]
yscale_labels[seq(2,46,4)]<- "Population"  
yscale_labels[seq(3,47,4)]<- "Turnout without PV"
yscale_labels[seq(4,48,4)]<-"Turnout with PV"


# (viii) plot results

data$group <- 3-data$group

data %>% select(yscale,variable,group,label,share_out) %>% arrange(yscale)

ggplot(data, aes(x = yscale, y = share_out,fill=factor(group),shape=factor(postal))) +
  geom_bar(stat='identity',color="black") + 
  geom_text(aes(x= yscale, y = xpos,label=label),size=4.1) +
  coord_flip() +
  theme_bw(base_size = 22)  + 
  xlab("") + ylab("Share in the Electorate") +
  theme(strip.text.y = element_text(angle=0)) +
  scale_y_continuous(labels=percent) +
  scale_x_continuous(breaks= yscale_breaks,labels=yscale_labels) +    
  scale_fill_brewer(guide=F,type='div', palette=5,direction=-1) +
  theme(legend.direction = "horizontal", legend.position = "bottom") +
  theme(panel.grid.minor.y =  element_line(colour = "white", size = 0.5,linetype="dotted")) +
  theme(legend.key = element_blank(), strip.background = element_rect(colour="black", fill="grey90" ) )+ 
  theme(panel.spacing = unit(0.9, "lines"))+
  theme(axis.text.y = element_text(face=c("bold","plain","plain","plain")))


ggsave(file="Figure3.pdf",width=16,height=16 )
